{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getDFile(dir):\n",
    "    files = os.listdir(dir)\n",
    "\n",
    "    imgs = []\n",
    "    labels = []\n",
    "    for file_name in files:\n",
    "        imgs.append(''.join(np.loadtxt(f'{dir}/{file_name}',dtype=str)))\n",
    "        labels.append(file_name.split('_')[0])\n",
    "\n",
    "\n",
    "    return pd.DataFrame({\n",
    "        \"imgs\":imgs,\n",
    "        \"labels\":labels\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getDFiles(dirs):\n",
    "    return list(map(getDFile,dirs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "[test,train] = getDFiles([\"testDigits\",\"trainingDigits\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "160"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A=\"0000000000000110000000000000000000000000000011111100000000000000000000000001111111110000000000000000000001111111111100000000000000000001111111111111100000000000000000001111111000111100000000000000000111111000000111000000000000000001111110000001110000000000000000111111000000011100000000000000001111110000000111100000000000000011111100000000011100000000000000111111000000000111000000000000001111100000000000111000000000000011111000000000001110000000000000011111000000000001110000000000000111110000000000011100000000000001111100000000000111000000000000111110000000000001110000000000001111100000000000011100000000000000111100000000000011100000000000001111000000000001111000000000000011110000000000011110000000000000111100000000001111100000000000000111100000000001111100000000000001111100000000111110000000000000011111000000011111100000000000000111110000001111110000000000000001111110001111111100000000000000000111111111111110000000000000000001111111111111000000000000000000001111111111000000000000000000000000111110000000000000\"\n",
    "\n",
    "B=\"0000000000000001100000000000000000000000000111111110000000000000000000000011111111111000000000000000000000111111111111000000000000000000011111111111111000000000000000000111111000111110000000000000000011111000000111100000000000000000111110000001111100000000000000001111100000001111100000000000000111111000000011111000000000000001111110000000011111000000000000011111100000000011110000000000000111111000000000111110000000000001111100000000001111000000000000011110000000000011110000000000000111100000000000111100000000000001111000000000000111000000000000001111000000000001110000000000000011110000000000011100000000000000111100000000000111000000000000011110000000000111100000000000000111100000000001111000000000000000111000000000011110000000000000001111100000111111100000000000000011111000111111110000000000000000111111111111111000000000000000000111111111111110000000000000000011111111111110000000000000000000011111111111000000000000000000000011111110000000000000000000000000111110000000000000000000000000000100000000000000000000\"\n",
    "\n",
    "\n",
    "def hamm(A,B):\n",
    "    return sum([a != b for (a,b) in zip(A,B)])\n",
    "\n",
    "hamm(A,B)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "174"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hamm(test.iloc[10,0],train.iloc[30,0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def knn(inX,df,k ):\n",
    "    dist = df.iloc[:,0].apply(lambda x: hamm(inX,x))\n",
    "\n",
    "    dist_l = pd.DataFrame({\n",
    "        'dist':dist,\n",
    "        'label':df.iloc[:,-1]\n",
    "    })\n",
    "\n",
    "    dist_k = (dist_l.sort_values(by=\"dist\")).iloc[:k]\n",
    "\n",
    "    return dist_k.value_counts('label').index[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'0'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn(A,test,3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'0'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn(A,test,3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def digitsTest(test,train,k):\n",
    "    predict=[]\n",
    "\n",
    "    for _, row in test.iterrows():\n",
    "        predict.append(knn(row[0],train,k))\n",
    "\n",
    "    return np.mean(predict == test.iloc[:,-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9894291754756871"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "digitsTest(test,train,3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'0'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn(B,test,3)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "a6dc62afd8b03c17538a9dfce2fcb18f62cec380cc7b77050462a64b7e4e4814"
  },
  "kernelspec": {
   "display_name": "Python 3.8.0 32-bit",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
